1 Loading

latmx2.mset <- phenomis::reading(ProMetIS::post_processed_dir.c(),
                                   report.c = "none")
latmx2.mset <- latmx2.mset[, ProMetIS::sets.vc()]

2 Splitting LAT and MX2

Discarding features with either:

  • NAs > 20%

  • variance < 1e-5

  • proteomics: imputation > 20% in both conditions

gene_mset.ls <- lapply(ProMetIS::genes.vc(),
                       function(gene.c) {
                         message(gene.c)
                         ProMetIS::subsetting(latmx2.mset,
                                              genes.vc = c("WT", gene.c))
                       })
## LAT
## Discarded 33 features: Haematology...Complete.blood.count..CBC.on.the.Advia.120..BASO...., Haematology...Complete.blood.count..CBC.on.the.Advia.120..EOSINO...., Haematology...Complete.blood.count..CBC.on.the.Advia.120..LUC...., Haematology...Complete.blood.count..CBC.on.the.Advia.120..LYMPHO...., Haematology...Complete.blood.count..CBC.on.the.Advia.120..MONO...., Haematology...Complete.blood.count..CBC.on.the.Advia.120..NEUTRO...., Haematology...Complete.blood.count..CBC.on.the.Advia.120.CHCM..g.dL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.CHDW..g.dL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.EOSINO..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.EOSINO.BRUT..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.HCT...., Haematology...Complete.blood.count..CBC.on.the.Advia.120.HGB..g.dL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.HGB.brut..g.dL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.Hte.brut...., Haematology...Complete.blood.count..CBC.on.the.Advia.120.LUC..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.LUC.BRUT..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.LYMPHO..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.LYMPHO.BRUT..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.MCH..pg., Haematology...Complete.blood.count..CBC.on.the.Advia.120.MCHC..g.dL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.MCV..fL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.MONO..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.MONO.BRUT..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.MPV..fL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.NEUTRO..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.NEUTRO.BRUT..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.PLT..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.PLT.brut..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.RBC..x10E06.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.RBC.brut..x10E06.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.RDW...., Haematology...Complete.blood.count..CBC.on.the.Advia.120.WBC..x10E03.cells.µL., Haematology...Complete.blood.count..CBC.on.the.Advia.120.WBC.brut..x10E03.cells.µL.
## Nb of discard. feat. in 'preclinical': nas_zerovar: 33, overimputed: 0
## Nb of discard. feat. in 'proteomics_liver': nas_zerovar: 0, overimputed: 89
## Nb of discard. feat. in 'proteomics_plasma': nas_zerovar: 0, overimputed: 27
## Nb of discard. feat. in 'metabolomics_liver_c18hypersil_pos': nas_zerovar: 0, overimputed: 0
## Nb of discard. feat. in 'metabolomics_liver_hilic_neg': nas_zerovar: 0, overimputed: 0
## Nb of discard. feat. in 'metabolomics_plasma_c18hypersil_pos': nas_zerovar: 0, overimputed: 0
## Nb of discard. feat. in 'metabolomics_plasma_hilic_neg': nas_zerovar: 0, overimputed: 0
## Nb of discard. feat. in 'metabolomics_plasma_c18acquity_pos': nas_zerovar: 0, overimputed: 0
## Nb of discard. feat. in 'metabolomics_plasma_c18acquity_neg': nas_zerovar: 0, overimputed: 0
## MX2
## Discarded 2 features: Eye.Morphology.OCT.right.anterior.chamber.depth, Eye.Morphology.OCT.right.corneal.thickness
## Discarded 3 samples: W621f, W623f, W633f
## Nb of discard. feat. in 'preclinical': nas_zerovar: 2, overimputed: 0
## Nb of discard. feat. in 'proteomics_liver': nas_zerovar: 0, overimputed: 97
## Nb of discard. feat. in 'proteomics_plasma': nas_zerovar: 0, overimputed: 24
## Nb of discard. feat. in 'metabolomics_liver_c18hypersil_pos': nas_zerovar: 0, overimputed: 0
## Nb of discard. feat. in 'metabolomics_liver_hilic_neg': nas_zerovar: 0, overimputed: 0
## Nb of discard. feat. in 'metabolomics_plasma_c18hypersil_pos': nas_zerovar: 0, overimputed: 0
## Nb of discard. feat. in 'metabolomics_plasma_hilic_neg': nas_zerovar: 0, overimputed: 0
## Nb of discard. feat. in 'metabolomics_plasma_c18acquity_pos': nas_zerovar: 0, overimputed: 0
## Nb of discard. feat. in 'metabolomics_plasma_c18acquity_neg': nas_zerovar: 0, overimputed: 0
names(gene_mset.ls) <- ProMetIS::genes.vc()

3 Univariate hypothesis testing

for (gene.c in ProMetIS::genes.vc()) {
  
  message(gene.c)
  
  gene.mset <- gene_mset.ls[[gene.c]]
  
  if (gene.c == "MX2") {
    # all sets: performing 'limma' 2 ways testing for gene and sex
    
    gene.mset <- phenomis::hypotesting(gene.mset,
                                       test.c = "limma2ways",
                                       factor_names.vc = c("gene", "sex"),
                                       factor_levels.ls = list(factor1.vc = c("WT", gene.c),
                                                               factor2.vc = ProMetIS::sex.vc()),
                                       signif_maxprint.i = 10,
                                       title.c = gene.c,
                                       report.c = "none")
     
    
  } else if (gene.c == "LAT") {
    # 'proteomics_liver' set: performing the 'limma' testing for gene in the male and female subsets, and 'limma' testing for sex in the LAT and WT subsets
    # all other sets: performing 'limma' 2 ways testing for gene and sex
        
    protliv.eset <- gene.mset[["proteomics_liver"]]
    
    protliv_fda.df <- Biobase::fData(protliv.eset)
 
    ## 'proteomics_liver': 'limma' testing for gene in the male and female subsets
    
    for (sex.c in ProMetIS::sex.vc()) {
      
      protlivsex.eset <- ProMetIS::subsetting(protliv.eset,
                                              set.c = "proteomics_liver",
                                              genes.vc = c("WT", "LAT"),
                                              sex.vc = sex.c)
      
      protlivsex.eset <- phenomis::hypotesting(protlivsex.eset,
                                               test.c = "limma",
                                               factor_names.vc = "gene",
                                               factor_levels.ls = list(factor1.vc = c("WT", gene.c)),
                                               signif_maxprint.i = 10,
                                               title.c = paste0("proteomics_liver, ", sex.c),
                                               report.c = "none")
      
      protlivsex.df <- Biobase::fData(protlivsex.eset)
      
      limmasex.df <- protlivsex.df[, grep("limma", colnames(protlivsex.df))]
      
      colnames(limmasex.df) <- gsub("limma_gene_",
                                    paste0("limma", sex.c, "_"),
                                    colnames(limmasex.df))
      
      protliv_fda.df <- merge(protliv_fda.df,
                              limmasex.df,
                              by = 0, all = TRUE, sort = FALSE)
      rownames(protliv_fda.df) <- protliv_fda.df[, "Row.names"]
      protliv_fda.df[, "Row.names"] <- NULL
      
    }
    
    stopifnot(identical(sort(rownames(protliv_fda.df)),
                        sort(Biobase::featureNames(protliv.eset))))
    
    Biobase::fData(protliv.eset) <- protliv_fda.df[Biobase::featureNames(protliv.eset), ]
    
    ## 'proteomics_liver': 'limma' testing for sex in the LAT and WT subsets
    
    for (gene.c in c("WT", "LAT")) {
      
      protlivgene.eset <- protliv.eset[, Biobase::pData(protliv.eset)[, "gene"] == gene.c]
      
      protlivgene.eset <- ProMetIS::subsetting(protlivgene.eset,
                                               set.c = "proteomics_liver",
                                               genes.vc = gene.c,
                                               sex.vc = ProMetIS::sex.vc())
      
      protlivgene.eset <- phenomis::hypotesting(protlivgene.eset,
                                                test.c = "limma",
                                                factor_names.vc = "sex",
                                                factor_levels.ls = list(factor1.vc = ProMetIS::sex.vc()),
                                                signif_maxprint.i = 10,
                                                title.c = paste0("proteomics_liver, ", gene.c),
                                                report.c = "none")
      
      protlivgene.df <- Biobase::fData(protlivgene.eset)
      
      limmagene.df <- protlivgene.df[, grep("limma_sex_",
                                            colnames(protlivgene.df))]
      
      colnames(limmagene.df) <- gsub("limma_sex_",
                                     paste0("limma", gene.c, "_"),
                                     colnames(limmagene.df))
      
      protliv_fda.df <- merge(protliv_fda.df,
                              limmagene.df,
                              by = 0, all = TRUE, sort = FALSE)
      rownames(protliv_fda.df) <- protliv_fda.df[, "Row.names"]
      protliv_fda.df[, "Row.names"] <- NULL
      
    }
    
    stopifnot(identical(sort(rownames(protliv_fda.df)),
                        sort(Biobase::featureNames(protliv.eset))))
    
    Biobase::fData(protliv.eset) <- protliv_fda.df[Biobase::featureNames(protliv.eset), ]
 
    ## all other sets: 'limma2ways' testing for gene and sex
    
    gene.mset <- gene.mset[, setdiff(names(gene.mset), "proteomics_liver")]
    
    gene.mset <- phenomis::hypotesting(gene.mset,
                                       test.c = "limma2ways",
                                       factor_names.vc = c("gene", "sex"),
                                       factor_levels.ls = list(factor1.vc = c("WT", gene.c),
                                                               factor2.vc = ProMetIS::sex.vc()),
                                       signif_maxprint.i = 10,
                                       title.c = gene.c,
                                       report.c = "none")
    
    # including the 'proteomics_liver' dataset back
    
    gene.mset <- MultiDataSet::add_eset(gene.mset,
                                        protliv.eset,
                                        dataset.type = "proteomics_liver",
                                        GRanges = NA,
                                        overwrite = TRUE,
                                        warnings = FALSE)
    
    # re-ordering
    
    gene.mset <- gene.mset[,
                           ProMetIS::sets.vc()[ProMetIS::sets.vc() %in% names(gene.mset)]]
    
  }
  
  gene_mset.ls[[gene.c]] <- gene.mset
  
}
## LAT
## Nb of discard. feat. in 'proteomics_liver': nas_zerovar: 0, overimputed: 14
## Nb of discard. feat. in 'proteomics_liver': nas_zerovar: 0, overimputed: 4

## Nb of discard. feat. in 'proteomics_liver': nas_zerovar: 0, overimputed: 20

## Nb of discard. feat. in 'proteomics_liver': nas_zerovar: 0, overimputed: 5

## MX2

4 Principal Component Analysis

Score plot colored according to genotype or sex.

for (gene.c in ProMetIS::genes.vc()) {
  
  message(gene.c)
  
  gene.mset <- gene_mset.ls[[gene.c]]
  
  gene_mset.pca <- ropls::opls(gene.mset, fig.pdfC = "none")
  
  ropls::plot(gene_mset.pca, plotPhenoDataC = "gene", typeVc = "x-score",
              parPaletteVc = ProMetIS::palette.vc()[rev(c("WT", gene.c))])
  
  ropls::plot(gene_mset.pca, plotPhenoDataC = "sex", typeVc = "x-score",
              parPaletteVc = ProMetIS::palette.vc()[rev(ProMetIS::sex.vc())])
  
  gene.mset <- ropls::getMset(gene_mset.pca)
  
  gene_mset.ls[[gene.c]] <- gene.mset
  
}
## LAT
## 
## 
## Building the model for the 'preclinical' dataset:
## PCA
## 28 samples x 203 variables
## standard scaling of predictors
## 245 (4%) NAs
##       R2X(cum) pre ort
## Total    0.504   4   0
## 
## 
## Building the model for the 'proteomics_liver' dataset:
## PCA
## 28 samples x 2098 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total     0.51   5   0
## 
## 
## Building the model for the 'proteomics_plasma' dataset:
## PCA
## 24 samples x 419 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.535   4   0
## 
## 
## Building the model for the 'metabolomics_liver_c18hypersil_pos' dataset:
## PCA
## 28 samples x 5665 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.548   4   0
## 
## 
## Building the model for the 'metabolomics_liver_hilic_neg' dataset:
## PCA
## 28 samples x 2866 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.545   4   0
## 
## 
## Building the model for the 'metabolomics_plasma_c18hypersil_pos' dataset:
## PCA
## 28 samples x 4788 variables
## standard scaling of predictors
## 1 (0%) NAs
##       R2X(cum) pre ort
## Total    0.525   5   0
## 
## 
## Building the model for the 'metabolomics_plasma_hilic_neg' dataset:
## PCA
## 28 samples x 3131 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.529   5   0
## 
## 
## Building the model for the 'metabolomics_plasma_c18acquity_pos' dataset:
## PCA
## 28 samples x 6104 variables
## standard scaling of predictors
## 76 (0%) NAs
##       R2X(cum) pre ort
## Total    0.525   6   0
## 
## 
## Building the model for the 'metabolomics_plasma_c18acquity_neg' dataset:
## PCA
## 28 samples x 1584 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.544   6   0

## MX2

## 
## 
## Building the model for the 'preclinical' dataset:
## PCA
## 29 samples x 234 variables
## standard scaling of predictors
## 407 (6%) NAs
##       R2X(cum) pre ort
## Total    0.538   5   0
## 
## 
## Building the model for the 'proteomics_liver' dataset:
## PCA
## 29 samples x 2090 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.514   5   0
## 
## 
## Building the model for the 'proteomics_plasma' dataset:
## PCA
## 25 samples x 422 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.512   4   0
## 
## 
## Building the model for the 'metabolomics_liver_c18hypersil_pos' dataset:
## PCA
## 29 samples x 5665 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.559   5   0
## 
## 
## Building the model for the 'metabolomics_liver_hilic_neg' dataset:
## PCA
## 29 samples x 2866 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total     0.55   5   0
## 
## 
## Building the model for the 'metabolomics_plasma_c18hypersil_pos' dataset:
## PCA
## 29 samples x 4788 variables
## standard scaling of predictors
## 1 (0%) NAs
##       R2X(cum) pre ort
## Total    0.508   5   0
## 
## 
## Building the model for the 'metabolomics_plasma_hilic_neg' dataset:
## PCA
## 29 samples x 3131 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.546   5   0
## 
## 
## Building the model for the 'metabolomics_plasma_c18acquity_pos' dataset:
## PCA
## 29 samples x 6104 variables
## standard scaling of predictors
## 77 (0%) NAs
##       R2X(cum) pre ort
## Total    0.535   6   0
## 
## 
## Building the model for the 'metabolomics_plasma_c18acquity_neg' dataset:
## PCA
## 29 samples x 1584 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.544   6   0

5 OPLS-DA

for (gene.c in ProMetIS::genes.vc()) {
  
  message(gene.c)
  
  gene.mset <- gene_mset.ls[[gene.c]]
  
  gene_mset.oplsda <- ropls::opls(gene.mset, "gene", predI = 1, orthoI = 1,
                                  fig.pdfC = "none")
  
  ropls::plot(gene_mset.oplsda, typeVc = "permutation")
  ropls::plot(gene_mset.oplsda, typeVc = "x-score",
              parPaletteVc = ProMetIS::palette.vc()[rev(c("WT", gene.c))])
  
  gene.mset <- ropls::getMset(gene_mset.oplsda)
  
  gene_mset.ls[[gene.c]] <- gene.mset
  
}
## LAT
## 
## 
## Building the model for the 'preclinical' dataset:
## OPLS-DA
## 28 samples x 203 variables and 1 response
## standard scaling of predictors and response(s)
## 245 (4%) NAs
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y pQ2
## Total    0.161    0.834   0.268 0.215   1   1  0.1 0.1
## No model was included for the 'preclinical' dataset because pQ2 was above 5%.
## 
## 
## Building the model for the 'proteomics_liver' dataset:
## OPLS-DA
## 28 samples x 2098 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.273    0.876   0.603 0.186   1   1 0.25 0.05
## 
## 
## Building the model for the 'proteomics_plasma' dataset:
## OPLS-DA
## 24 samples x 419 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.312    0.915   0.527 0.155   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_liver_c18hypersil_pos' dataset:
## OPLS-DA
## 28 samples x 5665 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.279    0.937   0.843 0.132   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_liver_hilic_neg' dataset:
## OPLS-DA
## 28 samples x 2866 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.341    0.921   0.826 0.148   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_plasma_c18hypersil_pos' dataset:
## OPLS-DA
## 28 samples x 4788 variables and 1 response
## standard scaling of predictors and response(s)
## 1 (0%) NAs
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.205    0.909   0.435 0.159   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_plasma_hilic_neg' dataset:
## OPLS-DA
## 28 samples x 3131 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.203    0.948   0.599 0.121   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_plasma_c18acquity_pos' dataset:
## OPLS-DA
## 28 samples x 6104 variables and 1 response
## standard scaling of predictors and response(s)
## 76 (0%) NAs
##       R2X(cum) R2Y(cum) Q2(cum)  RMSEE pre ort pR2Y  pQ2
## Total    0.128    0.973    0.51 0.0863   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_plasma_c18acquity_neg' dataset:
## OPLS-DA
## 28 samples x 1584 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.179     0.91   0.384 0.158   1   1  0.3 0.05
## No model has been built for the 'preclinical' dataset and thus no plot can be displayed.

## No model has been built for the 'preclinical' dataset and thus no plot can be displayed.

## MX2

## 
## 
## Building the model for the 'preclinical' dataset:
## OPLS-DA
## 29 samples x 234 variables and 1 response
## standard scaling of predictors and response(s)
## 407 (6%) NAs
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y pQ2
## Total    0.219    0.839   0.264 0.212   1   1  0.1 0.1
## No model was included for the 'preclinical' dataset because pQ2 was above 5%.
## 
## 
## Building the model for the 'proteomics_liver' dataset:
## OPLS-DA
## 29 samples x 2090 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.307    0.933   0.746 0.137   1   1  0.1 0.05
## 
## 
## Building the model for the 'proteomics_plasma' dataset:
## OPLS-DA
## 25 samples x 422 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.286    0.904   0.577 0.165   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_liver_c18hypersil_pos' dataset:
## OPLS-DA
## 29 samples x 5665 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.264    0.832   0.515 0.216   1   1  0.1 0.05
## 
## 
## Building the model for the 'metabolomics_liver_hilic_neg' dataset:
## OPLS-DA
## 29 samples x 2866 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.166    0.918   0.542 0.151   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_plasma_c18hypersil_pos' dataset:
## OPLS-DA
## 29 samples x 4788 variables and 1 response
## standard scaling of predictors and response(s)
## 1 (0%) NAs
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.153    0.925   0.495 0.145   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_plasma_hilic_neg' dataset:
## OPLS-DA
## 29 samples x 3131 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.234    0.919    0.49  0.15   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_plasma_c18acquity_pos' dataset:
## OPLS-DA
## 29 samples x 6104 variables and 1 response
## standard scaling of predictors and response(s)
## 77 (0%) NAs
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.161    0.935    0.44 0.134   1   1 0.05 0.05
## 
## 
## Building the model for the 'metabolomics_plasma_c18acquity_neg' dataset:
## OPLS-DA
## 29 samples x 1584 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.144    0.909   0.289  0.16   1   1  0.2 0.05
## No model has been built for the 'preclinical' dataset and thus no plot can be displayed.

## No model has been built for the 'preclinical' dataset and thus no plot can be displayed.

6 Feature selection

for (gene.c in ProMetIS::genes.vc()) {
  
  message(gene.c)
  
  gene.mset <- gene_mset.ls[[gene.c]]
  
  gene_mset.biosign <- biosigner::biosign(gene.mset,
                                          "gene",
                                          seedI = 123,
                                          plotTierMaxC = "A")
  
  gene.mset <- biosigner::getMset(gene_mset.biosign)
  
  gene_mset.ls[[gene.c]] <- gene.mset
  
}
## LAT
## 
## 
## Selecting the features for the 'preclinical' dataset:
## 
## 
## Selecting the features for the 'proteomics_liver' dataset:
## Significant features from 'S' groups:
##                                  plsda randomforest svm
## P62962_Profilin-1                "B"   "S"          "E"
## P62918_60S ribosomal protein L8  "B"   "S"          "E"
## P62830_60S ribosomal protein L23 "B"   "S"          "E"
## P35979_60S ribosomal protein L12 "S"   "E"          "E"
## Accuracy:
##      plsda randomforest   svm
## Full 0.860        0.829 0.851
## AS   0.905        0.961 0.817
## S    0.891        0.961    NA

## 
## 
## Selecting the features for the 'proteomics_plasma' dataset:
## Significant features from 'S' groups:
##                                  plsda randomforest svm
## Q9QZ39_Alpha-N-acetylgalactosam. "S"   "S"          "S"
## Accuracy:
##      plsda randomforest   svm
## Full 0.870        0.872 0.888
## AS   0.965        0.972 0.927
## S    0.965        0.972 0.969

## 
## 
## Selecting the features for the 'metabolomics_liver_c18hypersil_pos' dataset:
## Significant features from 'S' groups:
##                plsda randomforest svm
## M420.8761T4.64 "E"   "S"          "A"
## M426.7409T4.13 "C"   "S"          "E"
## M351.6679T1.45 "E"   "E"          "S"
## M613.0296T5.64 "E"   "E"          "S"
## M613.3621T5.59 "S"   "E"          "E"
## Accuracy:
##      plsda randomforest   svm
## Full 0.988        0.998 0.988
## AS   0.997        1.000 1.000
## S    0.974        0.995 1.000

## 
## 
## Selecting the features for the 'metabolomics_liver_hilic_neg' dataset:
## Significant features from 'S' groups:
##                 plsda randomforest svm
## M698.0528T10.35 "S"   "S"          "E"
## M439.1412T2.29  "E"   "E"          "S"
## Accuracy:
##      plsda randomforest   svm
## Full 0.988        0.956 0.996
## AS   0.965        0.997 0.967
## S    0.988        0.993 0.974

## 
## 
## Selecting the features for the 'metabolomics_plasma_c18hypersil_pos' dataset:
## Significant features from 'S' groups:
##                 plsda randomforest svm
## M340.1039T5.35  "C"   "S"          "S"
## M645.3722T16.46 "D"   "S"          "B"
## M250.0387T1.05  "B"   "S"          "E"
## Accuracy:
##      plsda randomforest   svm
## Full    NA        0.646 0.764
## AS   0.825        0.963 0.819
## S       NA        0.948 0.817

## 
## 
## Selecting the features for the 'metabolomics_plasma_hilic_neg' dataset:
## Significant features from 'S' groups:
##                                    plsda randomforest svm
## M239.0025T2.44                     "A"   "E"          "S"
## M248.0239T10.65                    "B"   "S"          "E"
## M112.0123T10.19                    "S"   "E"          "E"
## M542.739T17.12                     "S"   "E"          "E"
## M145T3.3_Monomethyl.glutaric.acid. "S"   "E"          "E"
## Accuracy:
##      plsda randomforest   svm
## Full 0.930        0.860 0.931
## AS   0.952        0.947 0.848
## S    0.994        0.915 0.920

## 
## 
## Selecting the features for the 'metabolomics_plasma_c18acquity_pos' dataset:
## Significant features from 'S' groups:
##                plsda randomforest svm
## M333.9622T1.15 "S"   "S"          "E"
## Accuracy:
##      plsda randomforest   svm
## Full    NA        0.642 0.772
## AS   0.932        0.860 0.790
## S    0.894        0.839    NA

## 
## 
## Selecting the features for the 'metabolomics_plasma_c18acquity_neg' dataset:
## Significant features from 'S' groups:
##                plsda randomforest svm
## M130.0661T10.3 "S"   "S"          "A"
## Accuracy:
##      plsda randomforest   svm
## Full 0.717        0.572 0.719
## AS   0.895        0.786 0.904
## S    0.816        0.788    NA

## MX2

## 
## 
## Selecting the features for the 'preclinical' dataset:
## 
## 
## Selecting the features for the 'proteomics_liver' dataset:
## Significant features from 'S' groups:
##                                  plsda randomforest svm
## P62852_40S ribosomal protein S25 "S"   "S"          "E"
## Q64462_Cytochrome P450 4B1       "S"   "E"          "B"
## P12791_Cytochrome P450 2B10      "E"   "E"          "S"
## O55071_Cytochrome P450 2B19      "E"   "E"          "S"
## Q8R1S9_Sodium-coupled neutral a. "E"   "E"          "S"
## Accuracy:
##      plsda randomforest   svm
## Full 0.957        0.940 0.964
## AS   0.975        0.993 0.891
## S    1.000        0.986 0.925

## 
## 
## Selecting the features for the 'proteomics_plasma' dataset:
## Significant features from 'S' groups:
##                                  plsda randomforest svm
## Q9QZ39_Alpha-N-acetylgalactosam. "S"   "S"          "S"
## Accuracy:
##      plsda randomforest   svm
## Full 0.858        0.863 0.854
## AS   0.963        0.946 0.970
## S    0.963        0.946 0.962

## 
## 
## Selecting the features for the 'metabolomics_liver_c18hypersil_pos' dataset:
## Significant features from 'S' groups:
##               plsda randomforest svm
## M243.089T6.43 "S"   "S"          "E"
## Accuracy:
##      plsda randomforest   svm
## Full 0.763        0.728 0.766
## AS   0.929        0.914 0.655
## S    0.903        0.914    NA

## 
## 
## Selecting the features for the 'metabolomics_liver_hilic_neg' dataset:
## Significant features from 'S' groups:
##                plsda randomforest svm
## M242.0134T1.57 "S"   "S"          "S"
## Accuracy:
##      plsda randomforest   svm
## Full 0.796        0.769 0.794
## AS   0.927        0.894 0.886
## S    0.911        0.868 0.891

## 
## 
## Selecting the features for the 'metabolomics_plasma_c18hypersil_pos' dataset:
## Significant features from 'S' groups:
##                plsda randomforest svm
## M340.1039T5.35 "S"   "E"          "S"
## M580.6222T5.04 "B"   "S"          "E"
## Accuracy:
##      plsda randomforest   svm
## Full    NA        0.825 0.797
## AS   0.942        0.947 0.858
## S    0.844        0.943 0.892

## 
## 
## Selecting the features for the 'metabolomics_plasma_hilic_neg' dataset:
## Significant features from 'S' groups:
##                plsda randomforest svm
## M242.0135T1.57 "S"   "B"          "S"
## M898.4079T8.77 "A"   "S"          "E"
## Accuracy:
##      plsda randomforest   svm
## Full 0.836        0.809 0.843
## AS   0.931        0.939 0.929
## S    0.911        0.851 0.901

## 
## 
## Selecting the features for the 'metabolomics_plasma_c18acquity_pos' dataset:
## Significant features from 'S' groups:
##                plsda randomforest svm
## M340.103T8.44  "S"   "S"          "A"
## M290.1752T8.52 "A"   "E"          "S"
## Accuracy:
##      plsda randomforest   svm
## Full 0.722        0.815 0.723
## AS   0.944        0.957 0.927
## S    0.898        0.831 0.778

## 
## 
## Selecting the features for the 'metabolomics_plasma_c18acquity_neg' dataset:
## Significant features from 'S' groups:
##                 plsda randomforest svm
## M143.0462T1     "A"   "E"          "S"
## M129.0193T0.9   "S"   "E"          "E"
## M237.186T16.3   "E"   "E"          "S"
## M826.9307T0.8_1 "E"   "E"          "S"
## Accuracy:
##      plsda randomforest   svm
## Full 0.641        0.556 0.644
## AS   0.906        0.664 0.793
## S    0.739           NA 0.899

7 Combining

# Merging LAT and MX2 results

# Common column names which have to be individualized
common_fvar.vc <- c("limma2ways_sex_M.F_",
                   # "limma2waysInter_sex_M.F_",
                   # "limma2waysInter_gene:sex_",
                   # "anova2ways_sex_M.F_",
                   # "anova2waysInter_sex_M.F_",
                   # "anova2waysInter_gene:sex_",
                   "limmaWT_M.F_", # proteomics_liver
                   # "limma_sex_M.F_diff_WT",        
                   # "limma_sex_M.F_BH_WT",                 
                   # "limma_sex_M.F_signif_WT",
                   # "limma_sex_M.F_", 
                   "PCA_xload-p",
                   "hclust",
                   "gene_OPLSDA_",
                   "gene_biosign_")

for (set.c in names(latmx2.mset)) {
  
  # initial ExpressionSet
  eset <- latmx2.mset[[set.c]]
  
  # initial fData
  fdata.df <- Biobase::fData(eset)
  
  # initial features
  features.vc <- Biobase::featureNames(eset)
  
  for (gene.c in ProMetIS::genes.vc()) {
    
    if (set.c %in% names(gene_mset.ls[[gene.c]])) {
      
      # gene fData
      gene_fdata.df <- Biobase::fData(gene_mset.ls[[gene.c]][[set.c]])
      
      # adding a 'LAT' or 'MX2' tag at the end of columns with identical names
      # for the two 'gene-specific' analyzes
      gene_fvar.vc <- colnames(gene_fdata.df)
      for (fvar.c in common_fvar.vc) {
        
        common_fvar.vi <- grep(fvar.c, gene_fvar.vc, fixed = TRUE)
        
        if (length(common_fvar.vi)) {
          gene_fvar.vc[common_fvar.vi] <- paste0(gene_fvar.vc[common_fvar.vi],
                                               "_", gene.c)
        }
      }
      colnames(gene_fdata.df) <- gene_fvar.vc
      
      # additional name simplification
      colnames(gene_fdata.df) <- gsub("gene_biosign_",
                                      "biosign_",
                                      gsub("gene_OPLSDA_",
                                           "OPLSDA_",
                                           gsub("limma2ways_gene_",
                                                "limma2ways_",
                                                gsub("limma2ways_sex_",
                                                     "limma2ways_",
                                                     colnames(gene_fdata.df)))))
      
      # merging
      fdata.df <- merge(fdata.df,
                        gene_fdata.df[, setdiff(colnames(gene_fdata.df),
                                                colnames(fdata.df))],
                        by = 0, all = TRUE, sort = FALSE)
      rownames(fdata.df) <- fdata.df[, "Row.names"]
      fdata.df[, "Row.names"] <- NULL
      fdata.df <- fdata.df[features.vc, ]
      
    }
    
  }
  
  Biobase::fData(eset) <- fdata.df
  
  latmx2.mset <- MultiDataSet::add_eset(latmx2.mset,
                                          eset,
                                          dataset.type = set.c,
                                          GRanges = NA,
                                          overwrite = TRUE,
                                          warnings = FALSE)
  
}

8 Re-ordering metadata

latmx2.mset <- ProMetIS:::metadata_select(latmx2.mset, 
                                          step.c = "3_statistics_singleomics")
## Supplementary metadata written in:
## ../inst/extdata/3_statistics_singleomics/metadata_supp.rdata

9 Saving (not run)

phenomis::writing(latmx2.mset,
                  gsub(ProMetIS::data_dir.c(),
                       "../../ProMetIS/inst/extdata",
                       ProMetIS::statistics_singleomics_dir.c()),
                  overwrite.l = TRUE)
## Writing the 'preclinical' dataset...
## The following file(s) have been written:
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/preclinical/dataMatrix.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/preclinical/sampleMetadata.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/preclinical/variableMetadata.tsv
## Writing the 'proteomics_liver' dataset...
## The following file(s) have been written:
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/proteomics_liver/dataMatrix.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/proteomics_liver/sampleMetadata.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/proteomics_liver/variableMetadata.tsv
## Writing the 'proteomics_plasma' dataset...
## The following file(s) have been written:
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/proteomics_plasma/dataMatrix.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/proteomics_plasma/sampleMetadata.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/proteomics_plasma/variableMetadata.tsv
## Writing the 'metabolomics_liver_c18hypersil_pos' dataset...
## The following file(s) have been written:
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_liver_c18hypersil_pos/dataMatrix.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_liver_c18hypersil_pos/sampleMetadata.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_liver_c18hypersil_pos/variableMetadata.tsv
## Writing the 'metabolomics_liver_hilic_neg' dataset...
## The following file(s) have been written:
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_liver_hilic_neg/dataMatrix.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_liver_hilic_neg/sampleMetadata.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_liver_hilic_neg/variableMetadata.tsv
## Writing the 'metabolomics_plasma_c18hypersil_pos' dataset...
## The following file(s) have been written:
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_c18hypersil_pos/dataMatrix.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_c18hypersil_pos/sampleMetadata.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_c18hypersil_pos/variableMetadata.tsv
## Writing the 'metabolomics_plasma_hilic_neg' dataset...
## The following file(s) have been written:
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_hilic_neg/dataMatrix.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_hilic_neg/sampleMetadata.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_hilic_neg/variableMetadata.tsv
## Writing the 'metabolomics_plasma_c18acquity_pos' dataset...
## The following file(s) have been written:
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_c18acquity_pos/dataMatrix.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_c18acquity_pos/sampleMetadata.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_c18acquity_pos/variableMetadata.tsv
## Writing the 'metabolomics_plasma_c18acquity_neg' dataset...
## The following file(s) have been written:
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_c18acquity_neg/dataMatrix.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_c18acquity_neg/sampleMetadata.tsv
## ../../ProMetIS/inst/extdata/3_statistics_singleomics/metabolomics_plasma_c18acquity_neg/variableMetadata.tsv
## The subfolders have been written in the directory:
## ../../ProMetIS/inst/extdata/3_statistics_singleomics